#########################################################################################################################################
#########################################################################################################################################

# dvs: a character vector of dependent variables
# ivs: a character vector of independent variables
# modename: a character  of model names
# n_sim: number of simulations
# var: variable to compute marginal effects
# data: the dataset used for model fit

# return as a list including :1) a ggplot obj;(2) estimated coefficents and robust SE

# For example:
############### fit fixed-effect model with robust standar errors by group
# return as a ggplot objective

#p <- FE_rb_plot(dvs = c("s_polempowerment", "fs_polempowerment", "f2s2_polempowerment",
#                        "f3s3_polempowerment", "f4s4_polempowerment", "f5s5_polempowerment",
#                        "f10s10_polempowerment", "f15s15_polempowerment"),
#                ivs = c("warDummy", "l_polempowerment", "s_polity2", "l_polity2", 
#                        "s_lpec", "l_lpec",  "l_neighborpolempowerment"),
#                modname = c("fe_polem0", "fe_polem1", "fe_polem2", "fe_polem3",
#                            "fe_polem4", "fe_polem5", "fe_polem10", "fe_polem15"),
#                n_sim = 1000,
#                var = "warDummy",
#                data = data)


#p$plot + scale_y_discrete(labels = c("15-year","10-year", "5-year",
#                                     "4-year", "3-year", "2-year", "1-year", "Current")) + 
#       labs(title = "Existential War and Fertility Rates",
#            x = "Average marginal effects")

#p$Results[[2]]


## define a function to run fixed-effect model with standard errors clustered by group
FE_rb_plot <- function(dvs, ivs, modname, n_sim = 1000, var, data){
       # load R pacakges      
       library(plm); library(lmtest);library(multiwayvcov)
       library(arm); library(purrr); library(dplyr)
       library(ggplot2); library(ggthemes); library(viridis)
       library(ggridges); library(purrr); library(dplyr)
       library(clubSandwich)
       #empty list to store marginal results       
       marginal <- list()     
       #empty list to store coefficents
       Results <- list()
       # empty list to store model fit
       Fits <- list()
       
       # for loop to run all models
       
       for (i in seq_along(modname)){
              # make a model formula
              f <- as.formula(paste(dvs[i], paste(ivs, collapse=" + "), sep=" ~ "))
              # Loading the required libraries
              #https://blog.theleapjournal.org/2016/06/sophisticated-clustered-standard-errors.html
              #Clustered standard errors can be computed in R, using the vcovHC() function from plm package.
              #vcovHC.plm() estimates the robust covariance matrix for panel data models.
              #The function serves as an argument to other functions such as coeftest(), 
              #waldtest() and other methods in the lmtest package. Clustering is achieved by the cluster argument,
              #that allows clustering on either group or time. The type argument allows estimating standard errors
              #by allowing for heteroskedasticity across groups. Recently, the plm package introduced the small 
              #sample correction as an option to the "type" argument of vcovHC.plm() function. This is switched on by specifying type="sss".
              
              # fit a fixed effect model at country-year-level
              fit <- plm(f, data = data, model = "within", index = c("ccode", "year"))
              Fits[[modname[i]]] <- fit
              # get the robust standard by group for fixed effect
              rb_coef <- coeftest(fit, vcov=vcovHC(fit, type="sss", cluster="group"))
                            # store results
              Results[[modname[i]]] <- rb_coef
              #extract the variance-covariance matrix
              vc <- vcovHC(fit, type="sss", cluster="group")
              #extract the coefficient 
              pars <- rb_coef[, "Estimate"]
              set.seed(321)
              #extrat the model data
              mydata2 <- model.matrix(f, data)
              #remove constant 1
              mydata2 <- as.data.frame(mydata2[,c(2: (length(pars) +1))])# remove constant 
              
              ### simulation based on observed values
              # simulations based on coefficient and variance-covariance
              draw <- mvrnorm(n_sim, data.matrix(pars), data.matrix(vc))
              #copy dataset for marginal effect
              data0 <- data1 <- mydata2
              #set the scenario for marginal effect
              data0[, var] <- 0
              data1[, var] <- 1
              
              #empty containers
              X1 <- as.matrix(data0)
              X2 <- as.matrix(data1)
              #store marginal effect in the list
              marginal[[modname[i]]] <- apply(apply(X2, 1, function (x) draw %*% x) - 
                                                     apply(X1, 1, function (x) draw %*% x), 1, mean) #1 indicates row, 2= columns
       }
       #convert the list to a data frame for plotting
       df_plot <- map(marginal, data.frame) %>%
              map2_df(., names(.), ~mutate(.x, type = .y))
       #rename first variable
       names(df_plot)[1] <- "value"
       
       ##change the order of the variable
       df_plot$type <- factor(df_plot$type, levels = rev(modname))
       
       #### Plotting
       fig <- ggplot(df_plot, aes(x = value, y = type, height=..density.., fill = type)) +
              geom_density_ridges(col = "grey70", scale = 1.2, show.legend = F) +
              scale_fill_viridis(discrete = TRUE) +
              geom_vline(xintercept = 0, colour = gray(1/2), lty = 2) +
              theme_ridges(font_size = 11, grid = F, center_axis_labels = T) +
              theme(axis.title.y = element_blank(), 
                    text = element_text(size=11),
                    plot.title = element_text(hjust = .5, size = 11, face = "bold")) 
       ## make a return obj
       p <- list(fig, Results, df_plot, Fits)
       #rename list
       names(p) <- c("plot", "Results", " df_plot", "Fits")
       return(p)
       
}

# Note for R and Stata: when usign plm, the model doesn't return a signle 
# intercept as the Stata would report. This is because in fixed effect model using 
# plm, it has many "inctercept" for each group. you can recover the constant in Stata
# by using 'mean(fixef(<yourmodel>))' in R.These two should be identical
# If you include time in stata as one predictor, you need to change the time variable
# in plm to a different one so plm can return the estimate of "time just as the Stata will do.
# The question is do we want to include "time as well in plm?
#Croissant, Yves, and Giovanni Millo. "Panel data econometrics in R: The plm package." Journal of statistical software 27.2 (2008): 1-43.



## R function to plot the coefficients for multiple models

coef_rb_plot <- function(modResults, vars = NULL, digits = 7){
       
       library(multiwayvcov)
       library(lmtest)
       library(dplyr)
       require(ggplot2)
       library(gridExtra)
       library(tidyr)
       library(stringr)
       library(purrr)
       
       modSumm = lapply(modResults, 
                        function(x) FUN= x[,c('Estimate','Std. Error','Pr(>|t|)')])
       
       
       noModels <- length(modSumm)
       
       modcoef <- lapply(modResults, function(x) FUN =round(x, digits))  
       modelcoef = lapply(modcoef, function(x) FUN = x[, "Estimate"])
       modelse = lapply(modcoef, function(x) FUN = x[, "Std. Error"])
       pvals = lapply(modcoef, function(x) FUN = x[, "Pr(>|t|)"])
       varnames = lapply(modResults, function(x) FUN = rownames(x))
       
       
       dfplot <- list()
       for (i in 1:length(modelcoef)){
              ad = 0.5 / modelse[[i]]
              modelcoef[[i]] = modelcoef[[i]]* ad
              modelse[[i]] = modelse[[i]]*ad 
              
              ylo95 =modelcoef[[i]] - 1.96*(modelse[[i]])
              yhi95 =modelcoef[[i]] + 1.96*(modelse[[i]])
              ylo90 =modelcoef[[i]] - 1.64*(modelse[[i]])
              yhi90 =modelcoef[[i]] + 1.64*(modelse[[i]])
              dfplot[[i]] = data.frame(varnames[[i]], modelcoef[[i]], modelse[[i]], ylo95, yhi95,
                                       ylo90, yhi90,
                                       Model = names(modSumm)[i])
       }
       
       
       coefficientdata = do.call(rbind,dfplot)
       colnames(coefficientdata) <- c("Variables", "Coefficients", "S.E",
                                      "Low95CI", "High95CI", 
                                      "Low90CI", "High90CI",
                                      "Model.Name")
       
       df <- coefficientdata %>%
              dplyr::mutate(Variables = as.character(Variables)) %>%
              dplyr::arrange(desc(Variables))
       
       df <- df[grep(paste(vars, collapse = "|"), df$Variables),]
       #re-order the variables: if factor, the order of factor levels is used; if character, an alphabetical order ist used
       df$Variables <- factor(df$Variables, levels = vars)
       
       p = ggplot(df, aes(color = Model.Name)) + 
              geom_hline(yintercept = 0, lty = 2) +
              geom_linerange(aes(x = Variables, ymin = Low90CI,
                                 ymax = High90CI),
                             lwd = .5, position = position_dodge(width = 1/2)) +
              geom_pointrange(aes(x = Variables, y = Coefficients, ymin = Low95CI,
                                  ymax = High95CI),
                              lwd = 1, position = position_dodge(width = 1/2),
                              fill = "WHITE") +
              coord_flip() + 
              scale_linetype_discrete() + 
              theme_bw() + xlab("") + ylab("Rescaled Coefficient") + 
              theme(legend.position="bottom",
                    legend.title=element_blank(),
                    legend.text = element_text(colour="blue", size=12, 
                                               face="bold"),
                    axis.text= element_text(size=12),
                    text = element_text(size=12),
                    plot.title = element_text(hjust = .5, size = 12, face = "bold"))
       
       return(p)   
}
#################################################################################################################
## for group level analysis
## define a function to run fixed-effect model with standard errors clustered by group
MLM_arg_plot <- function(dvs, ivs, modname, n_sim = 1000, var, data){
       # load R pacakges      
       library(lme4)
       library(arm); library(purrr)
       library(ggplot2); library(ggthemes); library(viridis)
       library(ggridges); library(purrr); library(dplyr)
       #empty list to store marginal results       
       marginal <- list()     
       # empty list to store model fit
       Fits <- list()
       # for loop to run all models
       
       for (i in seq_along(modname)){
              # make a model formula
              f <- as.formula(paste(paste(dvs[i], paste(ivs, collapse=" + "), sep=" ~ "), "(1 | countries_gwid)", sep = " + "))
              # set REML=FALSE option, it will use the optimization of the log-likelihood
              fit <- lmer(f, REML=FALSE, data = data)
              Fits[[modname[i]]] <- fit
              
              #extrat the model data
              mydata2 <- as.data.frame(model.matrix(fit))
              
              ### simulation based on observed values
              set.seed(321)
              draw <- sim(fit, n.sims=1000)@fixef
              #copy dataset for marginal effect
              data0 <- data1 <- mydata2
              #set the scenario for marginal effect
              data0[, var] <- 0
              data1[, var] <- 1
              
              #empty containers
              X1 <- as.matrix(data0)
              X2 <- as.matrix(data1)
              #store marginal effect in the list
              marginal[[modname[i]]] <- apply(apply(X2, 1, function (x) draw %*% x) - 
                                                     apply(X1, 1, function (x) draw %*% x), 1, mean) #1 indicates row, 2= columns
       }
       #convert the list to a data frame for plotting
       df_plot <- map(marginal, data.frame) %>%
              map2_df(., names(.), ~mutate(.x, type = .y))
       #rename first variable
       names(df_plot)[1] <- "value"
       
       ##change the order of the variable
       df_plot$type <- factor(df_plot$type, levels = rev(modname))
       
       #### Plotting
       fig <- ggplot(df_plot, aes(x = value, y = type, height=..density.., fill = type)) +
              geom_density_ridges(col = "grey70", scale = 1.2, show.legend = F) +
              scale_fill_viridis(discrete = TRUE) +
              geom_vline(xintercept = 0, colour = gray(1/2), lty = 2) +
              theme_ridges(font_size = 13, grid = F, center_axis_labels = T) +
              theme(axis.title.y = element_blank(), 
                    text = element_text(size=13),
                    plot.title = element_text(hjust = .5, size = 15, face = "bold")) 
       ## make a return obj
       p <- list(fig, df_plot, Fits)
       #rename list
       names(p) <- c("plot", " df_plot", "Fits")
       return(p)
       
}

## define a function to run fixed-effect model with standard errors clustered by group
MLMs_fit <- function(dvs, ivs, modname, data){
       # load R pacakges      
       library(lme4)
       library(arm); library(purrr)
       library(ggplot2); library(ggthemes); library(viridis)
       library(ggridges); library(purrr); library(dplyr)
       # empty list to store model fit
       Fits <- list()
       # for loop to run all models
       
       for (i in seq_along(modname)){
              # make a model formula
              f <- as.formula(paste(paste(dvs[i], paste(ivs, collapse=" + "), sep=" ~ "), "(1 | countries_gwid)", sep = " + "))
              # set REML=FALSE option, it will use the optimization of the log-likelihood
              fit <- lmer(f, REML=FALSE, data = data)
              Fits[[modname[i]]] <- fit
              
       }
       return(Fits)
       
}


MLM_maginal_plot <- function(Fits, n_sim = 1000, var, data){
       # load R pacakges      
       library(lme4)
       library(arm); library(purrr)
       library(ggplot2); library(ggthemes); library(viridis)
       library(ggridges); library(purrr); library(dplyr)
       #empty list to store marginal results       
       marginal <- list()     
       
       for (i in seq_along(Fits)){
              
              #extrat the model data
              mydata2 <- as.data.frame(model.matrix(Fits[[i]]))
              
              ### simulation based on observed values
              set.seed(321)
              draw <- sim(Fits[[i]], n.sims=1000)@fixef
              #copy dataset for marginal effect
              data0 <- data1 <- mydata2
              #set the scenario for marginal effect
              data0[, var] <- 0
              data1[, var] <- 1
              
              #empty containers
              X1 <- as.matrix(data0)
              X2 <- as.matrix(data1)
              modname <- names(Fits)
              #store marginal effect in the list
              marginal[[modname[i]]] <- apply(apply(X2, 1, function (x) draw %*% x) - 
                                                     apply(X1, 1, function (x) draw %*% x), 1, mean) #1 indicates row, 2= columns
       }
       #convert the list to a data frame for plotting
       df_plot <- map(marginal, data.frame) %>%
              map2_df(., names(.), ~mutate(.x, type = .y))
       #rename first variable
       names(df_plot)[1] <- "value"
       
       ##change the order of the variable
       df_plot$type <- factor(df_plot$type, levels = rev(modname))
       
       #### Plotting
       fig <- ggplot(df_plot, aes(x = value, y = type, height=..density.., fill = type)) +
              geom_density_ridges(col = "grey70", scale = 1.2, show.legend = F) +
              scale_fill_viridis(discrete = TRUE) +
              geom_vline(xintercept = 0, colour = gray(1/2), lty = 2) +
              theme_ridges(font_size = 13, grid = F, center_axis_labels = T) +
              theme(axis.title.y = element_blank(), 
                    text = element_text(size=13),
                    plot.title = element_text(hjust = .5, size = 15, face = "bold")) 
       ## make a return obj
       p <- list(fig, df_plot)
       #rename list
       names(p) <- c("plot", " df_plot")
       return(p)
       
}

## define a function to run fixed-effect model with standard errors clustered by group
FEgroup_rb_plot <- function(dvs, ivs, modname, n_sim = 1000, var, data){
       # load R pacakges      
       library(plm); library(lmtest);library(multiwayvcov)
       library(arm); library(purrr); library(dplyr)
       library(ggplot2); library(ggthemes); library(viridis)
       library(ggridges); library(purrr); library(dplyr)
       library(clubSandwich)
       #empty list to store marginal results       
       marginal <- list()     
       #empty list to store coefficents
       Results <- list()
       # empty list to store model fit
       Fits <- list()
       
       # for loop to run all models
       
       for (i in seq_along(modname)){
              # make a model formula
              f <- as.formula(paste(dvs[i], paste(ivs, collapse=" + "), sep=" ~ "))
              # Loading the required libraries
              #https://blog.theleapjournal.org/2016/06/sophisticated-clustered-standard-errors.html
              #Clustered standard errors can be computed in R, using the vcovHC() function from plm package.
              #vcovHC.plm() estimates the robust covariance matrix for panel data models.
              #The function serves as an argument to other functions such as coeftest(), 
              #waldtest() and other methods in the lmtest package. Clustering is achieved by the cluster argument,
              #that allows clustering on either group or time. The type argument allows estimating standard errors
              #by allowing for heteroskedasticity across groups. Recently, the plm package introduced the small 
              #sample correction as an option to the "type" argument of vcovHC.plm() function. This is switched on by specifying type="sss".
              
              # fit a fixed effect model at country-year-level
              fit <- plm(f, data = data, model = "within", index = c("gwgroupid", "year"))
              Fits[[modname[i]]] <- fit
              # get the robust standard by group for fixed effect
              rb_coef <- coef_test(fit,  vcov = "CR1", cluster = data$countries_gwid)
              # store results
              Results[[modname[i]]] <- rb_coef
              #extract the variance-covariance matrix
              vc <- vcovCR(fit, cluster = data$countries_gwid, type = "CR1")
              #extract the coefficient 
              pars <- rb_coef[, "beta"]
              
              #extrat the model data
              mydata2 <- model.matrix(f, data)
              #remove constant 1
              mydata2 <- as.data.frame(mydata2[,c(2: (length(pars) +1))])# remove constant 
              
              ### simulation based on observed values
              # simulations based on coefficient and variance-covariance
              set.seed(321)
              draw <- mvrnorm(n_sim, data.matrix(pars), data.matrix(vc))
              #copy dataset for marginal effect
              data0 <- data1 <- mydata2
              #set the scenario for marginal effect
              data0[, var] <- 0
              data1[, var] <- 1
              
              #empty containers
              X1 <- as.matrix(data0)
              X2 <- as.matrix(data1)
              #store marginal effect in the list
              marginal[[modname[i]]] <- apply(apply(X2, 1, function (x) draw %*% x) - 
                                                     apply(X1, 1, function (x) draw %*% x), 1, mean) #1 indicates row, 2= columns
       }
       #convert the list to a data frame for plotting
       df_plot <- map(marginal, data.frame) %>%
              map2_df(., names(.), ~mutate(.x, type = .y))
       #rename first variable
       names(df_plot)[1] <- "value"
       
       ##change the order of the variable
       df_plot$type <- factor(df_plot$type, levels = rev(modname))
       
       #### Plotting
       fig <- ggplot(df_plot, aes(x = value, y = type, height=..density.., fill = type)) +
              geom_density_ridges(col = "grey70", scale = 1.2, show.legend = F) +
              scale_fill_viridis(discrete = TRUE) +
              geom_vline(xintercept = 0, colour = gray(1/2), lty = 2) +
              theme_ridges(font_size = 13, grid = F, center_axis_labels = T) +
              theme(axis.title.y = element_blank(), 
                    text = element_text(size=13),
                    plot.title = element_text(hjust = .5, size = 15, face = "bold")) 
       ## make a return obj
       p <- list(fig, Results, df_plot, Fits)
       #rename list
       names(p) <- c("plot", "Results", " df_plot", "Fits")
       return(p)
       
}


#Interaction between binary variables

## define a function to run fixed-effect model with standard errors clustered by group
FE_rb_interacbinary <- function(dvs, ivs, modname, n_sim = 1000, bvarname1, cvarname2,
                                cval1, cval2, data){
        # load R pacakges      
        library(plm); library(lmtest);library(multiwayvcov)
        library(arm); library(purrr); library(dplyr)
        library(ggplot2); library(ggthemes); library(viridis)
        library(ggridges); library(purrr); library(dplyr)
        library(clubSandwich)
        #empty list to store marginal results       
        marginal <- list()     
        #empty list to store coefficents
        Results <- list()
        # empty list to store model fit
        Fits <- list()
        
        # for loop to run all models
        
        for (i in seq_along(modname)){
                # make a model formula
                f <- as.formula(paste(dvs[i], paste(ivs, collapse=" + "), sep=" ~ "))
                # Loading the required libraries
                #https://blog.theleapjournal.org/2016/06/sophisticated-clustered-standard-errors.html
                #Clustered standard errors can be computed in R, using the vcovHC() function from plm package.
                #vcovHC.plm() estimates the robust covariance matrix for panel data models.
                #The function serves as an argument to other functions such as coeftest(), 
                #waldtest() and other methods in the lmtest package. Clustering is achieved by the cluster argument,
                #that allows clustering on either group or time. The type argument allows estimating standard errors
                #by allowing for heteroskedasticity across groups. Recently, the plm package introduced the small 
                #sample correction as an option to the "type" argument of vcovHC.plm() function. This is switched on by specifying type="sss".
                
                # fit a fixed effect model at country-year-level
                fit <- plm(f, data = data, model = "within", index = c("ccode", "year"))
                Fits[[modname[i]]] <- fit
                # get the robust standard by group for fixed effect
                rb_coef <- coeftest(fit, vcov=vcovHC(fit, type="sss", cluster="group"))
                # store results
                Results[[modname[i]]] <- rb_coef
                #extract the variance-covariance matrix
                vc <- vcovHC(fit, type="sss", cluster="group")
                #extract the coefficient 
                pars <- rb_coef[, "Estimate"]
                set.seed(321)
                #extrat the model data
                mydata2 <- model.matrix(f, data)
                #remove constant 1
                mydata2 <- as.data.frame(mydata2[,c(2: (length(pars) +1))])# remove constant 
                
                ### simulation based on observed values
                # simulations based on coefficient and variance-covariance
                draw <- mvrnorm(n_sim, data.matrix(pars), data.matrix(vc))
                #make copy of the data
                X1 <- X2 <- X3 <-X4 <- mydata2
                
                ##set simulation
                df <- array(NA, c(2, 4))
                
                
                X1[, bvarname1] = 0 #no sv
                X1[, cvarname2] = cval1 
                X1[, paste(bvarname1, cvarname2,sep = ":")] = 0*cval1
                
                X2[,bvarname1] = 0 #no sv
                X2[,cvarname2] = cval2
                X2[, paste(bvarname1, cvarname2, sep = ":")] = 0*cval2
                
                fd_no <- apply(apply(X2, 1, function (x) draw %*% x) - 
                                       apply(X1, 1, function (x) draw %*% x), 1, mean) #1 indicates row, 2= columns
                
                X3[, bvarname1] = 1
                X3[, cvarname2] = cval1
                X3[, paste(bvarname1, cvarname2, sep = ":")] = 1*cval1
                
                X4[,bvarname1] = 1
                X4[,cvarname2] = cval2
                X4[, paste(bvarname1, cvarname2, sep = ":")] = 1*cval2
                
                fd_yes <- apply(apply(X4, 1, function (x) draw %*% x) - 
                                        apply(X3, 1, function (x) draw %*% x), 1, mean) #1 indicates row, 2= columns
                
                df[1, 1] <- 0
                df[1, 2] <- mean(fd_no)
                df[1, 3:4] <- quantile(fd_no, probs = c(.025,.975))
                df[2, 1] <- 1
                df[2, 2] <- mean(fd_yes)
                df[2, 3:4] <- quantile(fd_yes, probs = c(.025,.975))
                
                marginal[[modname[i]]] <- df
        }
        
        #convert the list to a data frame for plotting
        df_plot <- map(marginal, data.frame) %>%
                map2_df(., names(.), ~mutate(.x, type = .y))
        
        
        colnames(df_plot)[1:4] <- c("value", "mean", "lo", "hi")   
        ##change the order of the variable
        df_plot$type <- factor(df_plot$type, levels = modname)
        levels(df_plot$type) <-  c("Current", "1-year", "2-year","3-year",
                                   "4-year", "5-year", "10-year")
        
        fig <- ggplot(df_plot, aes(x=value, y=mean)) +
                geom_hline(aes(yintercept=0), linetype=2, color = "black") + 
                geom_point(size=2) + 
                geom_linerange(aes(ymin=lo, ymax=hi),alpha = 1, size = 1) + 
                theme_bw() + scale_x_discrete(limits = c(0,1),labels = c("No Sexual Violence","Sexual Violence")) + 
                xlab("") + ylab("")+ facet_grid(cols = vars(type))+
                theme(legend.position="none",
                      legend.title=element_blank(),
                      axis.text = element_text(size=10),
                      text = element_text(size=10),
                      strip.text.x = element_text(size = 10, color='white', angle=0, hjust=.5),
                      strip.background = element_rect(fill = "#525252", color='#525252'),
                      plot.title = element_text(hjust = .5, size = 14, face = "bold")) + 
                coord_flip()
        
        ## make a return obj
        p <- list(fig, Results, df_plot, Fits)
        #rename list
        names(p) <- c("plot", "Results", " df_plot", "Fits")
        return(p)
        
}
